/*          
    Purpose: This file creates a "recall" matrix
    		 with adult child's recalled father occupation 
    		 (when child b/w 30 and 50) as the rows and
    		 father's modal actual reported occupation 
    		 between 30 and 50 as the columns.

    Creates: PSID_recall_matrix.dta
    		 PSID_recall_matrix_long.dta
*/

clear 
set more off

cd "$Mydirectory1/1_DataSources/PSID"

**-----------------------------------------------------------------------------**
**-----------------------------------------------------------------------------**

**************************************
*** MERGE ADULT CHILDREN AND FATHERS
**************************************

	use ./output/PSID_sons_retrospective.dta, clear
 
	merge m:1 father_id using ./output/Fathers_modaloccs.dta
	drop if _merge==1
	drop _merge

	sort son_id year	
	
* Keep one observation per adult child 
	keep if firstobs_son==1
* Trim
	keep mode_occ_* mode_occ_son byr divorced race_son father_id son_id
	
* Generate recall matrix 
	/*Note: rows (wide format) = adult child's recalled father occupation (when child b/w 30 and 50)
			cols (wide format) = father's modal actual reported occ between 30 and 50
	*/

	//Choose variables 
	local son_inc "mode_occ_son30to50_max" 
	local father_inc "mode_occ_30to50_max"

	sort `son_inc' `father_inc'
	keep if `son_inc'!=. & `father_inc'!=.
	
	by `son_inc': gen total_obs = _N
	
	* Make matrix variables to be filled in
	local occ_list "1 2 3 5 6 7 8 9 10 17 18 28 30 31 32 34 35 36 42 48 51 61 65 68 71 78 81" //complete version
	foreach x in `occ_list' {
		gen father_occ_`x'=.
	}
	
	* Fill in using percentages 
	levelsof `son_inc' , local(sons_occ)
	levelsof  `father_inc', local(fathers_occ)
	
	foreach x in `sons_occ' {
		foreach y in `fathers_occ' {	
			count if  `father_inc'==`y' & `son_inc'==`x'
			local num = `r(N)'
			count if `son_inc'==`x'
			local denom = `r(N)'
			local ratio = (`num' / `denom')*100
			
			replace father_occ_`y' = `ratio' if `son_inc'==`x'		
		}
	}

	egen total_sum = rowtotal(father_occ_1-father_occ_81)
	tab total_sum
	
	* Keep 1 observation per adult child respondent
	bysort `son_inc': keep if _n==1
	keep `son_inc' father_occ_1-father_occ_81 total_obs
	
	foreach x in `sons_occ' {
		tab father_occ_`x' if `son_inc'==`x'
	}

	* Drop if all matrix cells are missing
	drop father_occ_36
	
	rename `son_inc' fatheroccej 
	
	save ./output/PSID_recall_matrix.dta, replace
	
	* Make long version of matrix	
	reshape long father_occ_, i(fatheroccej total_obs) j(father_numb)
	
	rename father_occ_ father_share
	rename father_numb father_occ
	
	save ./output/PSID_recall_matrix_long.dta, replace

	
